python python截取长网页 并发送邮件

葫芦的运维日志

下一篇 搜索 上一篇

浏览量 34549

2019/11/15 12:14


python截取长网页 并将截取的网页保存至本地 后发送邮件

准备工作 安装依赖

yum install chromedriver

yum install https://dl.google.com/linux/direct/google-chrome-stable_current_x86_64.rpm

pip install selenium

核心代码

#!/bin/python
# coding=utf-8
# author: wz
# mail: 277215243@qq.com
# datetime:2019/10/15 12:42 PM
# web: https://www.bthlt.com

from selenium import webdriver
import time
import os.path
import multiprocessing as mp
from selenium.webdriver.chrome.options import Options
from email import encoders
from email.mime.base import MIMEBase
from email.header import Header
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.mime.image import MIMEImage

mailto_list = ['277215243@qq.com']
mail_host = 'smtp.163.com'
mail_user = '******@163.com'
mail_pass = '******'
mail_postfix = '163.com'

def webshot():
    chrome_options = webdriver.ChromeOptions()
    chrome_options.add_argument('lang=zh_CN.UTF-8')
    chrome_options.add_argument('--headless')
    chrome_options.add_argument('--no-sandbox')
    driver = webdriver.Chrome(options=chrome_options)
    driver.maximize_window()
    js_height = "return document.body.clientHeight"
    picname = 'tbc.png'
    link = 'https://www.bthlt.com/'
    try:
        driver.get(link)
        k = 1
        height = driver.execute_script(js_height)
        while True:
            if k * 800 < height:
                js_move = "window.scrollTo(0,{})".format(k * 800)
                print(js_move)
                driver.execute_script(js_move)
                time.sleep(0.2)
                height = driver.execute_script(js_height)
                k += 1
            else:
                break
        scroll_width = driver.execute_script('return document.body.parentNode.scrollWidth')
        scroll_height = driver.execute_script('return document.body.parentNode.scrollHeight')
        driver.set_window_size(scroll_width, scroll_height)
        driver.get_screenshot_as_file(
            "/data/www/" + picname)
        print("Process {} get one pic !!!".format(os.getpid()))
        time.sleep(3)
    except Exception as e:
        print(picname, e)

def send_mail(to_list, sub):
    me = '葫芦' + '<' + mail_user + '@' + mail_postfix + '>'
    msg = MIMEMultipart()
    msg['Subject'] = sub
    msg['From'] = me
    msg['to'] = ';'.join(to_list)
    body = """
<html lang="en">
    <body>
    <h1>脚本网页截图</h1>
    <hr />
    <br />
    <a href="https://www.bthlt.com">葫芦的运维日志</a>
    <br />
    ![](cid:0)
    </body>
    </html>
"""
    msg.attach(MIMEText(body, 'html', 'utf-8'))
    with open('/data/www/tbc.png', 'rb') as f:
        mime = MIMEBase('image', 'png', filename='tbc.png')
        mime.add_header('Content-Disposition', 'attachment', filename='tbc.png')
        mime.add_header('Content-ID', '<0>')
        mime.add_header('X-Attachment-Id', '0')
        mime.set_payload(f.read())
        encoders.encode_base64(mime)
        msg.attach(mime)
    try:
        server = smtplib.SMTP()
        server.connect(mail_host)
        server.login(mail_user, mail_pass)
        server.sendmail(me, to_list, msg.as_string())
        server.close()
        return True
    except Exception as e:
        print(e)
        return False

if __name__ == '__main__':
    t = time.time()
    webshot()
    if send_mail(mailto_list, '脚本网页截图 并发送邮件'):
        print('发送成功')
    else:
        print('发送失败')
    print("操作结束,耗时:{:.2f}秒".format(float(time.time() - t)))

-rw-r--r-- 1 root root 2.5M Nov 15 12:46 /data/www/tbc.png

结果

待解决

中文乱码问题未能解决 

补充已解决

将本地中文字体传至 /usr/share/fonts目录中解决中文字体问题

葫芦的运维日志

打赏

上一篇 搜索 下一篇
© 冰糖葫芦甜(bthlt.com) 2021 王梓打赏联系方式 陕ICP备17005322号-1